home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 22
/
Cream of the Crop 22.iso
/
program
/
cgazv4n2.zip
/
SOUNDEX4.C
< prev
next >
Wrap
C/C++ Source or Header
|
1989-10-26
|
5KB
|
153 lines
/************************** SOUNDEX4.C *************************
* Optimized Soundex Algorithm (Algorithm #4)
* Author: Joe Celko
* Compilers: Turbo C 2.0, Microsoft C 5.0
*
* Compile time switches:
* TEST to get a test driver
*
* Source code may be freely used if source is acknowledged
* Object code may be freely used
*/
/* Preserves first character, uppercase name, drop non-alphas
* convert letters to Soundex digits, and returns first n letters.
* Many of the steps here could be combined into the same loop,
* but they are kept separate for clarity and to give the user
* a chance to experiment with changes.
*/
#define TEST 100
#include <string.h>
void soundex4 (char *inname, /* name to be transformed */
char *outcode, /* where to put soundex code */
int n /* length of soundex code */
)
{
char *p, *p1;
int i;
char workbuf[100]; /* that should be long enough */
char priorletter; /* for duplicate checking */
/* make a working copy */
strncpy(workbuf, inname, 99);
workbuf[99] = '\0'; /* just in case */
strupr(workbuf);
/* convert all vowels to A */
for (p = workbuf; *p; p++)
if (strchr("AEIOUY", *p))
*p = 'A';
/* prefix transformations: done only once on the front of a name */
if (strncmp(workbuf, "MAC", 3) == 0) /* MAC to MCC */
workbuf[1] = 'C';
else if (strncmp(workbuf, "KN", 2) == 0) /* KN to NN */
workbuf[0] = 'N';
else if (workbuf[0] == 'K') /* K to C */
workbuf[0] = 'C';
else if (strncmp(workbuf, "PF", 2) == 0) /* PF to FF */
workbuf[0] = 'F';
else if (strncmp(workbuf, "SCH", 3) == 0) /* SCH to SSS */ {
workbuf[1] = 'S';
workbuf[2] = 'S';
}
/* infix transformations: done after the first letter,
* and are from left to right on the name
*/
while ((p = strstr(workbuf, "DG")) > workbuf) /* DG to GG */
p[0] = 'G';
while ((p = strstr(workbuf, "CAAN")) > workbuf) /* CANN to TAAN */
p[0] = 'T';
while ((p = strchr(workbuf, 'D')) > workbuf) /* D to T */
p[0] = 'T';
while ((p = strstr(workbuf, "NST")) > workbuf) /* NST to NTT */
p[2] = 'S';
while ((p = strstr(workbuf, "AV")) > workbuf) /* AV to AF */
p[1] = 'F';
while ((p = strchr(workbuf, 'Q')) > workbuf) /* Q to G */
p[0] = 'G';
while ((p = strchr(workbuf, 'Z')) > workbuf) /* Z to S */
p[0] = 'S';
while ((p = strchr(workbuf, 'M')) > workbuf) /* M to N */
p[0] = 'N';
while ((p = strstr(workbuf, "KN")) > workbuf) /* KN to NN */
p[0] = 'N';
while ((p = strchr(workbuf, 'K')) > workbuf) /* K to C */
p[0] = 'C';
while ((p = strstr(workbuf, "AH")) > workbuf) /* AH to AA */
p[1] = 'A';
while ((p = strstr(workbuf, "HA")) > workbuf) /* HA to AA */
p[0] = 'A';
while ((p = strstr(workbuf, "AW")) > workbuf) /* AW to AA */
p[1] = 'A';
while ((p = strstr(workbuf, "PH")) > workbuf) /* PH to FF */ {
p[0] = 'F';
p[1] = 'F';
}
while ((p = strstr(workbuf, "SCH")) > workbuf) { /* SCH to SSS */
p[1] = 'S'; p[2] = 'S';
};
/* suffix transformations: done on the end of the word,
* going right to left
*/
/* (1) remove terminal A's and S's */
i = strlen(workbuf) - 1;
for (; (i > 0) && (workbuf[i] == 'S' || workbuf[i] == 'A'); i--)
workbuf[i] = '\0';
/* (2) terminal NT to TT */
i = strlen(workbuf) - 1;
for (; (i > 0) && (workbuf[i-1] == 'N') && (workbuf[i] == 'T'); i--)
workbuf[i-1] = 'T';
/* now strip out all vowels except the first (remember that all
* vowels were transformed into 'A's earlier).
*/
p = p1 = workbuf;
while (*p1++ = *p++) {
while (*p == 'A')
p++;
}
/* Remove all duplicate letters.
* Note this is different from the Soundex3 (see article) duplicate
* cleanup because the letter transforms can create duplicates
* at the front of the output name.
*/
p = p1 = workbuf;
priorletter = '\001'; /* an unlikely value */
do {
while (*p == priorletter)
p++;
priorletter = *p;
} while (*p1++ = *p++);
strncpy(outcode, workbuf, n);
outcode[n] = '\0'; /* just in case */
}
#if defined (TEST)
#include <stdio.h>
#include <stdlib.h>
void main(int argc, char **argv)
{
char outbuf[50];
if (argc != 3) {
puts("Usage: soundex4 name length\n");
exit(0);
}
soundex4(argv[1], outbuf, atoi(argv[2]));
printf(" Result: %s\n", outbuf);
}
#endif